From: Jeroen van der Heijden Date: Fri, 22 Jun 2018 14:41:01 +0000 (+0200) Subject: Added regular expressions X-Git-Tag: archive/raspbian/2.0.44-1+rpi1~1^2~3^2~9^2~10 X-Git-Url: https://dgit.raspbian.org/%22http://www.example.com/cgi/success//%22http:/www.example.com/cgi/success/?a=commitdiff_plain;h=136b1875c0ffce432b1980084ef2dfd51ac0853c;p=siridb-server.git Added regular expressions --- diff --git a/ChangeLog-2.0.29 b/ChangeLog-2.0.29 new file mode 100644 index 00000000..40eddec8 --- /dev/null +++ b/ChangeLog-2.0.29 @@ -0,0 +1 @@ + * Added filter log series by using a regular expressions. (issue #100) diff --git a/grammar/gogrammar/grammar.go b/grammar/gogrammar/grammar.go index 3f5f2d1e..eb74c392 100644 --- a/grammar/gogrammar/grammar.go +++ b/grammar/gogrammar/grammar.go @@ -4,7 +4,7 @@ package grammar // should be used with the goleri module. // // Source class: SiriGrammar -// Created at: 2018-06-14 16:27:16 +// Created at: 2018-06-22 15:10:04 import ( "regexp" @@ -1141,6 +1141,7 @@ func SiriGrammar() *goleri.Grammar { string, rInteger, rFloat, + rRegex, kNan, kInf, kNinf, diff --git a/grammar/grammar.py b/grammar/grammar.py index fb96d8d3..c01d780d 100644 --- a/grammar/grammar.py +++ b/grammar/grammar.py @@ -497,6 +497,7 @@ class SiriGrammar(Grammar): string, r_integer, r_float, + r_regex, k_nan, k_inf, k_ninf, diff --git a/include/siri/db/aggregate.h b/include/siri/db/aggregate.h index c758aeda..95ea715b 100644 --- a/include/siri/db/aggregate.h +++ b/include/siri/db/aggregate.h @@ -16,6 +16,7 @@ #include #include #include +#include typedef struct siridb_point_s siridb_point_t; typedef struct siridb_points_s siridb_points_t; @@ -29,6 +30,8 @@ typedef struct siridb_aggr_s uint64_t limit; uint64_t offset; double timespan; // used for derivative + pcre2_code * regex; \ + pcre2_match_data * match_data; qp_via_t filter_via; } siridb_aggr_t; diff --git a/include/siri/grammar/grammar.h b/include/siri/grammar/grammar.h index 2875b76c..bd6c2dc8 100644 --- a/include/siri/grammar/grammar.h +++ b/include/siri/grammar/grammar.h @@ -5,7 +5,7 @@ * should be used with the libcleri module. * * Source class: SiriGrammar - * Created at: 2018-06-14 16:27:16 + * Created at: 2018-06-22 15:10:04 */ #ifndef CLERI_EXPORT_SIRI_GRAMMAR_GRAMMAR_H_ #define CLERI_EXPORT_SIRI_GRAMMAR_GRAMMAR_H_ diff --git a/include/siri/parser/queries.h b/include/siri/parser/queries.h index 23de3313..e40bd45c 100644 --- a/include/siri/parser/queries.h +++ b/include/siri/parser/queries.h @@ -22,6 +22,7 @@ #include #include #include +#include #define QUERIES_IGNORE_DROP_THRESHOLD 1 #define QUERIES_SKIP_GET_POINTS 2 diff --git a/include/siri/version.h b/include/siri/version.h index bf227c3a..776eaf58 100644 --- a/include/siri/version.h +++ b/include/siri/version.h @@ -13,7 +13,7 @@ #define SIRIDB_VERSION_MAJOR 2 #define SIRIDB_VERSION_MINOR 0 -#define SIRIDB_VERSION_PATCH 28 +#define SIRIDB_VERSION_PATCH 29 #define SIRIDB_STRINGIFY(num) #num #define SIRIDB_VERSION_STR(major,minor,patch) \ diff --git a/src/siri/db/aggregate.c b/src/siri/db/aggregate.c index 898c0541..2fdfd78d 100644 --- a/src/siri/db/aggregate.c +++ b/src/siri/db/aggregate.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,7 @@ typedef int (* AGGR_cb)( static AGGR_cb AGGREGATES[F_OFFSET]; static siridb_aggr_t * AGGREGATE_new(uint32_t gid); +static int AGGREGATE_regex_cmp(siridb_aggr_t * aggr, char * val); static void AGGREGATE_free(siridb_aggr_t * aggr); static int AGGREGATE_init_filter( siridb_aggr_t * aggr, @@ -536,8 +538,10 @@ static siridb_aggr_t * AGGREGATE_new(uint32_t gid) aggr->limit = 0; aggr->offset = 0; aggr->timespan = 1.0; - aggr->filter_tp = TP_INT; /* when string we must - * malloc/free * aggr->filter_via.raw */ + aggr->regex = NULL; + aggr->match_data = NULL; + aggr->filter_via.raw = NULL; + aggr->filter_tp = TP_INT; /* when string we must cleanup more */ return aggr; } @@ -549,6 +553,8 @@ static void AGGREGATE_free(siridb_aggr_t * aggr) if (aggr->filter_tp == TP_STRING) { free(aggr->filter_via.raw); + pcre2_code_free(aggr->regex); + pcre2_match_data_free(aggr->match_data); } free(aggr); } @@ -601,6 +607,27 @@ static int AGGREGATE_init_filter( (char *) aggr->filter_via.raw, node->str, node->len); return 0; + case CLERI_GID_R_REGEX: + if (aggr->filter_opr != CEXPR_EQ && aggr->filter_opr != CEXPR_NE) + { + sprintf(err_msg, + "Regular expressions can only be used with 'equal' (==) " + "or 'not equal' (!=) operator."); + return -1; + } + aggr->filter_tp = TP_STRING; + /* extract and compile regular expression */ + if (siridb_re_compile( + &aggr->regex, + &aggr->match_data, + node->str, + node->len, + err_msg)) + { + return -1; /* error_msg is set */ + } + return 0; + default: assert (0); break; @@ -778,6 +805,20 @@ static siridb_points_t * AGGREGATE_difference( return points; } +static int AGGREGATE_regex_cmp(siridb_aggr_t * aggr, char * val) +{ + int ret; + ret = pcre2_match( + aggr->regex, + (PCRE2_SPTR8) val, + strlen(val), + 0, // start looking at this point + 0, // OPTIONS + aggr->match_data, + 0); // length of sub_str_vec + return aggr->filter_opr == CEXPR_EQ ? ret >= 0 : ret < 0; +} + static siridb_points_t * AGGREGATE_filter( siridb_points_t * source, siridb_aggr_t * aggr, @@ -815,7 +856,6 @@ static siridb_points_t * AGGREGATE_filter( siridb_points_t * points = siridb_points_new(source->len, source->tp); - if (points == NULL) { sprintf(err_msg, "Memory allocation error."); @@ -832,7 +872,11 @@ static siridb_points_t * AGGREGATE_filter( i < source->len; i++, spt++) { - if (cexpr_str_cmp(aggr->filter_opr, spt->val.str, value.str)) + if (value.str != NULL // NULL is a regular expression + ? cexpr_str_cmp( + aggr->filter_opr, + spt->val.str, value.str) + : AGGREGATE_regex_cmp(aggr, spt->val.str)) { dpt->ts = spt->ts; dpt->val.str = strdup(spt->val.str); diff --git a/src/siri/grammar/grammar.c b/src/siri/grammar/grammar.c index 1ab80064..00d1ad45 100644 --- a/src/siri/grammar/grammar.c +++ b/src/siri/grammar/grammar.c @@ -5,7 +5,7 @@ * should be used with the libcleri module. * * Source class: SiriGrammar - * Created at: 2018-06-14 16:27:16 + * Created at: 2018-06-22 15:10:04 */ #include "siri/grammar/grammar.h" @@ -954,10 +954,11 @@ cleri_grammar_t * compile_grammar(void) cleri_choice( CLERI_NONE, CLERI_MOST_GREEDY, - 6, + 7, string, r_integer, r_float, + r_regex, k_nan, k_inf, k_ninf diff --git a/test/test_select.py b/test/test_select.py index 2ed866db..6ae91bd1 100644 --- a/test/test_select.py +++ b/test/test_select.py @@ -3,6 +3,7 @@ import functools import random import time import math +import re from testing import Client from testing import default_test_setup from testing import gen_data @@ -214,6 +215,21 @@ class TestSelect(TestBase): [1447253549, 538], [1447254748, 537]]}) + self.assertEqual( + await self.client0.query( + 'select filter(/l.*/) from * where type == string'), + {'log': [p for p in DATA['log'] if re.match('l.*', p[1])]}) + + self.assertEqual( + await self.client0.query( + 'select filter(==/l.*/) from * where type == string'), + {'log': [p for p in DATA['log'] if re.match('l.*', p[1])]}) + + self.assertEqual( + await self.client0.query( + 'select filter(!=/l.*/) from * where type == string'), + {'log': [p for p in DATA['log'] if not re.match('l.*', p[1])]}) + self.assertEqual( await self.client0.query('select limit(300, mean) from "aggr"'), {'aggr': DATA['aggr']}) @@ -322,6 +338,16 @@ class TestSelect(TestBase): await self.client0.query('select difference() from "one"'), {'one': []}) + with self.assertRaisesRegexp( + QueryError, + 'Regular expressions can only be used with.*'): + await self.client0.query('select filter(~//) from "log"') + + with self.assertRaisesRegexp( + QueryError, + 'Cannot use a string filter on number type.'): + await self.client0.query('select filter(//) from "aggr"') + with self.assertRaisesRegexp( QueryError, 'Cannot use mean\(\) on string type\.'):